# Import libraries
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer as sentiment_eng
from vaderSentiment_fr.vaderSentiment import SentimentIntensityAnalyzer as sentiment_fr

###############################################################################

### Data Preparation ###

# Load data for 24 leaders (change file path)
tweets_df = pd.read_excel(r'C:\Users\...\leaders-and-twitter_replication-materials\data\data-raw-tweets-african-leaders.xlsx')

# Load data for Trudeau language test (change file path)
tweets_df = pd.read_excel(r'C:\Users\...\leaders-and-twitter_replication-materials\data\data-raw-tweets-trudeau.xlsx')

# Clean data
start_date = tweets_df['date'] >= '2018-01-01'
end_date = tweets_df['date'] <= '2022-01-01'
tweets_df = tweets_df.loc[start_date & end_date]

###############################################################################

### Sentiment Modeling with VADER ###
sent_eng = sentiment_eng()
sent_fr = sentiment_fr()

sentiment_df = tweets_df.copy(deep = True)
sentiment_en = sentiment_df[sentiment_df['lang'] == 'en']
sentiment_fr = sentiment_df[sentiment_df['lang'] == 'fr']
    
sentiment_en['sentiment_categories'] = sentiment_en['text'].apply(lambda text: sent_eng.polarity_scores(text))
sentiment_en['sentiment_score'] = sentiment_en['sentiment_categories'].apply(lambda score_dict: score_dict['compound'])
sentiment_fr['sentiment_categories'] = sentiment_fr['text'].apply(lambda text: sent_fr.polarity_scores(text))
sentiment_fr['sentiment_score'] = sentiment_fr['sentiment_categories'].apply(lambda score_dict: score_dict['compound'])

sentiment_df_final = pd.concat([sentiment_en, sentiment_fr])
sentiment_df_final.to_excel(r'C:\Users\...\leaders-and-twitter_replication-materials\data\data-replicated-sentiment-vader.xlsx')



